StartAt <- Sys.time()

Document created on: r StartAt

knitr::opts_chunk$set(echo = FALSE,
                      fig.align = "center",
                      fig.keep  = "all"
                      )

# library(tidyr)
# library(dplyr)

library(hyperSpec)
# library(NMF)

# library(ggthemes)
library(xtable)
# library(GGally)
# library(corrplot)
library(pander)

library(spHelper)
library(spPlot)
library(knitrContainer)

library(rpart)
library(randomForest)
library(caret)
library(rattle)
library(rfPermute)
library(e1071)

library(doParallel)
cl <- makeCluster(7)
registerDoParallel(cl)

corrMethod <-  "spearman"

Klasifikacija

# load('2016-01-08.RData')
# load('2016-01-08 (nRuns = 40).RData')
# load("TD_2009 (3)komp.RData")

# load("TD_2009 (3)komp.RData")
load("D:/Dokumentai/R/Spektroskopija/TD_2009/Data/NMF 3 komp (outl).RData")
# Set random seed. Don't remove this line.
set.seed(123)

# k - number of folds
k = 3

# Initialize the accs vector
accs <- rep(0, k)

# Indices for training set
# indeksai cross-validacijai

# AMP_obj <- scoresPCAALS 
AMP_obj <- scoresNMF

# AMP_obj    <- AMP_obj[AMP_obj$gr!= "S",,]
AMP_obj$gr <- droplevels(AMP_obj$gr)

# AMP_obj$Safranin <- as.factor(AMP_obj$Safranin)
# 
# levels(AMP_obj$Safranin) <- c("0-1","0-1","2-3","2-3")
AMP_obj2  <-  AMP_obj;


# AMP_obj2$gr <- factor(AMP_obj2$Safranin)
# data("Spectra2")
# AMP_obj2 <- Spectra2
test_ind <- createFoldsBS(AMP_obj2$.., ID = "ID", gr = "gr",
                         k = k,
                         returnTrain = FALSE)


# i = 1
# 
# conf  = vector("list", k)
conf2 = vector("list", k)
    fitControl <- trainControl(
                           ## 10-fold CV
                           method = "cv",
                           # method = "repeatedcv",
                           number = 5,
                           ## repeated n times
                           repeats = 5,
                           p = 0.75, # training percentage

                           verboseIter = FALSE,
                           returnData = TRUE,
                           returnResamp = "all", #"final", #
                           classProbs = TRUE,
                           # savePredictions = FALSE,

                           selectionFunction = "best",
                           # search = "grid",


             # # preProcOptions = list(thresh = 0.95, ICAcomp = 3, k = 5),
             # sampling = NULL, # the type of additional sampling that is conducted after resampling (usually to resolve class imbalances)
             # index = NULL, #  a list with elements for each resampling
             #               #  iteration. Each list element is a vector of integers
             #               #  corresponding to the rows used for training at
             #               #  that iteration.
             # indexOut = NULL,
             # indexFinal = NULL,
             # predictionBounds = rep(FALSE, 2),
             # seeds = NA,
             # adaptive = list(min = 5, alpha = 0.05,
             #                 method = "gls", complete = TRUE),
             trim = FALSE,
             allowParallel = TRUE
                           )  
REZ <- knitrContainer()


for (i in 1:k) {

  REZ %<>% add_as_heading3(paste("Results of Fold", i))

  # Exclude them from the train set
  training <- AMP_obj[-test_ind[[i]],]

  # Include them in the test set
  test <- AMP_obj[test_ind[[i]],]

# =========================================================================
# Create Classification Model: Random Forests
# =========================================================================
  # A model is learned using each training set
  set.seed(1)
  # trained_RF <- train(gr ~  spc + Boos + Safranin + coll_1 + coll_2 + coll_oth,
  trained_RF <- train(gr ~  Safranin + coll_oth,
                       data = training,
                       metric = "Kappa",
                       # preProcess = c("center", "scale"),
                       method = 'parRF',
                       importance = TRUE,
                        proximity = TRUE,

                       ntree = 1e2,
                       tuneGrid  = data.frame(mtry  = 1:3),
                       trControl = fitControl
                     )

  REZ %<>% add_as_data(trained_RF)
  REZ %<>% add_as_is(tryCatch(ggplot(trained_RF)+ggLims(c(0,1)),
                              error = function(cond)return(NULL)) )

  REZ %<>% add_as_code(trained_RF)
  REZ %<>% add_as_code(predictors(trained_RF))

  REZ %<>% add_as_cmd(tryCatch(varImpPlot(trained_RF$finalModel,
                                          main = "Variable Importance Plot"),
                              error = function(cond)return(NULL)))

  REZ %<>% add_as_pander(importance(trained_RF$finalModel))

  # Make a prediction on the test set using randomForest
  pred <- predict(trained_RF, test)




  # Assign the confusion matrix to conf
  # conf <- table(test$gr, pred)
   conf <- table(test$gr, pred)

  # pander(conf)

  # print(confusionMatrix(test$gr, pred))
  REZ %<>% add_as_code(confusionMatrix(test$gr, pred)) 

  # Assign the accuracy of this model to the ith index in accs
  accs[i] <- sum(diag(conf))/sum(conf)

  # Add horizintal rule
  REZ %<>% add_as_text("***") 

}

Summary of Results


Average Kappa = r mean(accs)

# Print out the mean of accs
pander(data.frame(Fold = factor(c(1:k, 'vidutinis')),
                  Tikslumas = c(accs,mean(accs))))

Results for each fold

print_all(REZ)
stopCluster(cl)


GegznaV/spMisc documentation built on April 26, 2020, 5:59 p.m.